This online ressource is there to provide additional interactive visualization tool for the Varroa mites sampling and sequencing data obtained from the associated paper “XXXX”. A Snakemake pipeline created and used for genomics mapping, variant calling and analysis is provided in this host GitHub repository. Most genomics and demographic analysis using fastsimcoal2 were computed on the Okinawa Institute of Science and Technology OIST cluster, Sango.

Click on “code” button on the right to make appear the libraries used and R code for each section.

setwd("~/Dropbox (OIST)/varroa-host-switch-demo/R_Markdown")
library("tidyverse")
library("knitr") # for the markdown
library("kableExtra") # for creating a scrolling table
library("ggplot2") # for plotting 
library("maps")
library("leaflet")
library("htmltools")
library("rgdal")
library("gridExtra")
library("gghighlight")
library("adegenet")
library("vcfR")
library("ggmap")
library("poppr")
library("RColorBrewer")
library("gridExtra")
library("igraph")
library("StAMPP")
library("lattice")
library("reshape2")
library("ggrepel") # for text labels
library("pcadapt")
library("data.table")
library("ggthemes")
library("ggpubr")
library("boot")
library("plotly")
library("forcats")
library("plotly")

1. Distribution maps of the Varroa mites sampled

Here, we have 43 samples of Varroa mites for which we have sequenced the whole genome succesfully. We use the leaflet package to create an interactive map showing the location and from which host, each Varroa mite was collected.

ID = Name code of the samples obtained from the CSIRO varroa collection
Species = Identified species according to mtDNA of the mtDNA COX1 partial gene
Host collected = Varroa mites were collected from within honey bee colonies identified by the collector as either Western honey bee Apis mellifera or Eastern honey bee Apis cerana.
Location/Country/Date = Exact informations on sampling
Site = Code name of each sampling site corresponding on the map Approximate.X/Y = GPS coordinates from samples obtained before 2008 are infered from approximate location given. All samples geo-coordinates after 2008 were obtained directly from field collection.

COI_haplogroup = mtDNA haplogroup identified from mtDNA COX1 partial region (reconstruct from HiSeq4000 reads and confirmed by Sanger sequencing).
Haplotype = mtDNA haplotype name by concatenating four genes (COX1, COX3, ATP6 and ND2)

Host_read = Host DNA identified by number of reads mapped against either A. cerana or A. mellifera
reads_mellifera/cerana = Number of reads mapped against reference genome of A. cerana or A. mellifera (Q > 20)

# Import the metadata for the Varroa samples 
metadata <- read.csv("R_data/metadata_varroa_asia_15052020.csv", header = TRUE)
#head(metadata)

## Create a scrolling table
kable(cbind(metadata)) %>%
  kable_styling(bootstrap_options = "striped", font_size = 9)  %>%
  scroll_box(width = "100%", height = "400px") #%>%
ID SEQ_ID Species Host_collected Location Country Date Site Approximate.X Approximate.Y COI_haplogroup Haplotype Host_read reads_cerana reads_mellifera Mean_Read_Depth Total_reads Mapped Mapping_rate Mapped_in_pair Platform
V212 VD212 V. destructor A. cerana Gwangju, Jeonlabug Province South Korea 01/1996 KOR02 35.159540 126.85260 VD Korea K1 VD Korea K1-1/K1-2 cerana 56 0 16.6 66272622 41762260 63.0 38388417 HiSeq 4000
V642_1 VD642_1 V. destructor A. cerana Zhuhai county, Guangdong Province China 11/2001 CHN08 22.614010 112.63183 VD China C1 VD China C1-3 cerana 2857 7 47.1 119107269 116682672 98.0 112230978 HiSeq 4000
V657_1 VD657_1 V. destructor A. cerana Zhongshan, Guangdong Province China 04/2002 CHN10 23.946090 114.69726 VD China C1 VD China C1-3 cerana 46 38 21.5 54511467 53363635 97.9 49849866 HiSeq 4000
V651_1 VD651_1 V. destructor A. cerana Dayao county, Yunnan Province China 04/2002 CHN05 25.729510 101.33661 VD China C3 VD China C3-2 cerana 167 6 28.2 71261265 69804599 98.0 66308649 HiSeq 4000
V577_1 VD577_1 V. destructor A. cerana Yunnan Agricultural University, Kunming, Yunnan Province China 05/2002 CHN06 25.125650 102.74318 VD China C4 VD China C4-2 cerana 125 4 35.0 91721740 89577965 97.7 85757545 HiSeq 4000
V654_1 VD654_1 V. destructor A. cerana Xishuangbanna, Yunnan Province China 04/2002 CHN07 22.008810 100.79714 VD Viet Nam V1 VD Viet Nam V1-5 cerana 626 0 25.1 63453379 62161106 98.0 58776768 HiSeq 4000
V658_2 VD658_2 V. destructor A. cerana Xishuangbanna, Yunnan Province China 04/2002 CHN07 22.008810 100.79714 VD Viet Nam V1 VD Viet Nam V1-6 cerana 2693 8 15.0 44339428 38654491 87.2 35187264 HiSeq 4000
V676_2 VD676_2 V. destructor A. cerana Chiang Mai University Thailand 01/2003 THA01 18.805960 98.95336 VD Viet Nam V1 VD Viet Nam V1-7 cerana 366 10 5.9 29358273 19254785 65.6 17719549 HiSeq 4000
V149 VD149 V. destructor A. cerana Phjong, Ninh Binh Province Viet Nam 10/1996 VNM01 21.181970 106.00161 VD Viet Nam V1 VD Viet Nam V1-8 cerana 1773 76 10.7 46623619 29994005 64.3 26399304 HiSeq 4000
V475_1 VD475_1 V. destructor A. cerana Hoc Chau Viet Nam 11/1998 VNM02 20.922080 104.75209 VD Korea K1 VD Korea K1-1/K1-2 mellifera 6 555 19.3 50827244 48381793 95.2 44842843 HiSeq 4000
V153_2 VD153_2 V. destructor A. mellifera Jungup South Korea 10/1996 KOR01 35.569880 126.85589 VD Korea K1 VD Korea K1-1/K1-2 mellifera 0 5824 22.2 56113114 55021332 98.1 51939126 HiSeq 4000
V159_1 VD159_1 V. destructor A. mellifera Jungup South Korea 10/1996 KOR01 35.569880 126.85589 VD Korea K1 VD Korea K1-1/K1-2 mellifera 14 5707 24.2 64315412 60528716 94.1 55776173 HiSeq 4000
V639_1 VD639_1 V. destructor A. mellifera Heilongjiang Province China 09/2001 CHN01 46.617160 131.15727 VD Korea K1 VD Korea K1-1/K1-2 mellifera 20 1096 17.2 45259621 43071591 95.2 39536470 HiSeq 4000
V625_2 VD625_2 V. destructor A. mellifera Fengman, Lishu Co., Changchun, Jilin Province China 05/2001 CHN02 43.821600 126.56227 VD Korea K1 VD Korea K1-1/K1-2 mellifera 2 217 16.6 46030858 41845105 90.9 37822659 HiSeq 4000
V622_1 VD622_1 V. destructor A. mellifera Apiary of Jinzhong, Yuci, Shanxi Province China 06/2001 CHN03 37.697790 112.70822 VD Korea K1 VD Korea K1-1/K1-2 mellifera 0 1243 13.8 39024603 35070629 89.9 31675460 HiSeq 4000
V641_1 VD641_1 V. destructor A. mellifera Mianzhu County, Sichuan Province China 09/2001 CHN04 31.338070 104.22074 VD Korea K1 VD Korea K1-7 mellifera 6 871 21.0 54493356 52402973 96.2 48743391 HiSeq 4000
V646_1 VD646_1 V. destructor A. mellifera Zhongshan, Guangdong Province China 11/2001 CHN09 23.927990 113.15883 VD Korea K1 VD Korea K1-7 mellifera 4 322 25.8 66768826 64203224 96.2 59914828 HiSeq 4000
V150_2 VD150_2 V. destructor A. mellifera Nho Quan Viet Nam 10/1996 VNM03 20.322630 105.74960 VD Korea K1 VD Korea K1-1/K1-2 mellifera 26 218 33.2 86978010 84333771 97.0 80000889 HiSeq 4000
V474_1 VD474_1 V. destructor A. mellifera Hoc Chau Viet Nam 01/1998 VNM02 20.922080 104.75209 VD Korea K1 VD Korea K1-1/K1-2 mellifera 52 648 24.8 63242091 61585346 97.4 58056817 HiSeq 4000
V028 VJ028 V. jacobsoni A. cerana UPM, Serdang, Kuala Lumpur Malaysia 03/1995 MYS01 2.987890 101.73517 VJ Malaysia 2 VJ Malaysia 2-1 cerana 168 66 50.4 131769709 125151456 95.0 120488777 HiSeq 4000
V780 VJ780 V. jacobsoni A. cerana Chauthanh Village, Bentra District Viet Nam 03/2003 VNM06 10.304830 106.35520 VJ Laos L1 VJ Laos L1-3 cerana 40372 4 15.6 47957642 38953995 81.2 36880992 HiSeq 4000
V787_2 VJ787_2 V. jacobsoni A. cerana Rach Gia Town, Kien Giang Viet Nam 08/2004 VNM04 10.021500 105.09109 VJ Laos L1 VJ Laos L1-4 cerana 2325 28 29.6 78330554 75988942 97.0 72622136 HiSeq 4000
V788_1 (replicate discarded) VJ788_1 V. jacobsoni A. cerana My tho City, Kien Giang Viet Nam 08/2004 VNM05 10.376520 106.34388 VJ Laos L1 VJ Laos L1-4 cerana 1291 2 7.0 43611088 19916649 45.7 18505755 HiSeq 4000
V788_2 VJ788_2 V. jacobsoni A. cerana My tho City, Kien Giang Viet Nam 08/2004 VNM05 10.376520 106.34388 VJ Laos L1 VJ Laos L1-4 cerana 72 2 10.8 30299193 27206968 89.8 25941038 HiSeq 4000
V789_1 VJ789_1 V. jacobsoni A. cerana Giong Trom Town, Ben Tre Viet Nam 08/2004 VNM07 10.157180 106.50445 VJ Laos L1 VJ Laos L1-4 cerana 859 4 23.6 71558906 58473472 81.7 56649683 HiSeq 4000
V333_1 VJ333_1 V. jacobsoni A. cerana Parung Panjang, Java Indonesia 06/1998 IDN01 -6.348140 106.56935 VJ Java 1 VJ Java 1-1 cerana 21 10 17.9 46019459 44634047 97.0 42592178 HiSeq 4000
V341_1 VJ341_1 V. jacobsoni A. cerana Sukabumi, West Java Indonesia 06/1998 IDN02 -6.923870 106.93156 VJ Java 1 VJ Java 1-2 cerana 119 4 24.8 63057170 61695629 97.8 59251392 HiSeq 4000
V347_1 VJ347_1 V. jacobsoni A. cerana Cililin Indonesia 06/1998 IDN04 -6.983450 107.43608 VJ Java 1 VJ Java 1-3 cerana 61 4 18.2 48579482 45809925 94.3 42898293 HiSeq 4000
V363_1 VJ363_1 V. jacobsoni A. cerana Johor, East Java Indonesia 06/1998 IDN05 -7.223850 112.73320 VJ Java 1 VJ Java 1-4 cerana 5115 64 21.3 54026529 52825280 97.8 50723792 HiSeq 4000
V565_2 VJ565_2 V. jacobsoni A. cerana Gunung Arca, Sukabumi Indonesia 04/2002 IDN03 -6.926560 106.95533 VJ Java 1 VJ Java 1-5 cerana 714 28 42.2 107959838 106090376 98.3 102687165 HiSeq 4000
V325_1 VJ325_1 V. jacobsoni A. cerana Kuta, Lombok Indonesia 04/1998 IDN06 -8.880270 116.28361 VJ Lombok 2 VJ Lombok 2-1 cerana 152 0 24.7 62599084 61346630 98.0 58922351 HiSeq 4000
V950_1 VJ950_1 V. jacobsoni A. cerana Aiyura Papua New Guinea 05/1997 PNG01 -6.343340 145.90809 VJ PNG 1 VJ PNG 1-1 cerana 475 68 25.8 66335209 64182154 96.8 61215275 HiSeq 4000
V854_1 VJ854_1 V. jacobsoni A. cerana Goroka Papua New Guinea 05/2008 PNG04 -6.083470 145.38626 VJ PNG 1 VJ PNG 1-1 cerana 5230 12 16.7 44613684 42039605 94.2 39498649 HiSeq 4000
V853_4 VJ853_4 V. jacobsoni A. cerana Daru Papua New Guinea 06/2008 PNG11 -9.078190 143.20997 VJ PNG 1 VJ PNG 1-2 cerana 200 60 10.4 35177854 32209884 91.6 29491225 HiSeq 4000
V983_1 VJ983_1 V. jacobsoni A. cerana Asaro Papua New Guinea 11/2015 PNG05 -6.012000 145.32500 VJ PNG 1 VJ PNG 1-1 cerana 284 102 19.1 50283170 47881277 95.2 44989750 HiSeq 4000
V847 VJ847 V. jacobsoni A. mellifera Wabag, Enga Province Papua New Guinea 06/2008 PNG10 -5.482670 143.66373 VJ PNG 1 VJ PNG 1-1 mellifera 23 115346 22.5 64504980 56218551 87.2 52755225 HiSeq 4000
V852_3 VJ852_3 V. jacobsoni A. mellifera Goroka, Eastern Highlands Province Papua New Guinea 05/2008 PNG04 -6.083470 145.38626 VJ PNG 1 VJ PNG 1-1 mellifera 4 2288 20.9 55816067 54523224 97.7 51390551 HiSeq 4000
V857_2 VJ857_2 V. jacobsoni A. mellifera Benabena, Eastern Highlands Province Papua New Guinea 05/2008 PNG03 -6.128125 145.42904 VJ PNG 1 VJ PNG 1-1 mellifera 20 429 18.8 57477057 46759609 81.4 44551404 HiSeq 4000
V856_1 VJ856_1 V. jacobsoni A. mellifera Henganofi Border, Eastern Highlands Province Papua New Guinea 05/2008 PNG02 -6.260350 145.59681 VJ PNG 1 VJ PNG 1-1 mellifera 15 9803 13.6 58525652 37045620 63.3 31803798 HiSeq 4000
V952_1 VJ952_1 V. jacobsoni A. mellifera Amayufa, Eastern Highlands Province Papua New Guinea 05/2014 PNG06 -5.975581 145.27676 VJ PNG 1 VJ PNG 1-1 mellifera 18 36 22.3 61969612 55519034 89.6 52870625 HiSeq 4000
V953_2 VJ953_2 V. jacobsoni A. mellifera Benabena, Eastern Highlands Province Papua New Guinea 05/2014 PNG03 -6.128125 145.42904 VJ PNG 1 VJ PNG 1-1 mellifera 22 26196 22.3 62054570 56103596 90.4 53032044 HiSeq 4000
V954_2 VJ954_2 V. jacobsoni A. mellifera Windy Lodge Papua New Guinea 06/2014 PNG07 -6.035146 144.96032 VJ PNG 1 VJ PNG 1-1 mellifera 14 159 22.5 58553267 56180797 96.0 52846232 HiSeq 4000
V955_1 VJ955_1 V. jacobsoni A. mellifera Donakanage Papua New Guinea 06/2014 PNG08 -5.882240 144.82077 VJ PNG 1 VJ PNG 1-1 mellifera 15 460 24.7 64865555 61577522 94.9 58553870 HiSeq 4000
V956_4 VJ956_4 V. jacobsoni A. mellifera Minj Papua New Guinea 06/2014 PNG09 -5.826640 144.40741 VJ PNG 1 VJ PNG 1-2 cerana 1444 69 17.1 47824500 45966716 96.1 42702569 HiSeq 4000
Mom #1 Mom #1 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 02/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 0 7448 20.2 52944228 50821794 96.0 52944228 HiSeq 4000
Son #1 Son #1 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 02/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 0 341 19.8 51620028 49266370 95.4 51620028 HiSeq 4000
Mom #2 Mom #2 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 02/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 0 50765 24.8 65220912 62435130 95.7 65220912 HiSeq 4000
Son #2 Son #2 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 02/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 0 422 18.7 48499085 46511492 95.9 48499085 HiSeq 4000
Mom #7 Mom #7 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 02/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 0 563 16.3 43293860 41401282 95.6 43293860 HiSeq 4000
Son #7 Son #7 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 02/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 0 660 18.8 49134877 46899128 95.5 49134877 HiSeq 4000
Mom #15 Mom #15 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 05/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 0 100 22.5 59835146 56138955 93.8 59835146 HiSeq 4000
Son #15 Son #15 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 05/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 1 193 21.4 57025270 53922121 94.6 57025270 HiSeq 4000
Mom #17 Mom #17 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 05/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 2 40283 22.1 58014781 55352009 95.4 58014781 HiSeq 4000
Son #17 Son #17 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 05/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 2 949 33.0 87139528 82953771 95.2 87139528 HiSeq 4000
Mom #19 Mom #19 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 05/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 0 24181 18.1 47748745 45343415 95.0 47748745 HiSeq 4000
Son #19 Son #19 V. destructor A. mellifera OIST Ecology and Evolution experimental apiary, Okinawa Japan 05/2018 JPN01 26.456448 127.82386 VD Korea K1 VD Korea K1-1/K1-2 mellifera 5 498 24.5 65304925 61543197 94.2 65304925 HiSeq 4000
  #row_spec(1:23, bold = T, color = "white", background = "#E60000") %>%
  #row_spec(24:39, bold = T, color = "white", background = "#FF9999")  %>%
  #row_spec(40:74, bold = T, color = "white", background = "#0000FF")  %>%
  #row_spec(75:96, bold = T, color = "white", background = "#3399FF")

# Download the country borders layer
#download.file("http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip" , destfile="world_shape_file.zip")
#system("unzip world_shape_file.zip")
world_spdf=readOGR(dsn= getwd() , layer="TM_WORLD_BORDERS_SIMPL-0.3")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/alphamanae/Dropbox (OIST)/varroa-host-switch-demo/R_Markdown", layer: "TM_WORLD_BORDERS_SIMPL-0.3"
## with 246 features
## It has 11 fields
## Integer64 fields read as strings:  POP2005
data.vdac <- metadata %>% filter(Species == "V. destructor") %>% filter(Host_collected == "A. cerana") 

data.vdam <- metadata %>% filter(Species == "V. destructor") %>% filter(Host_collected == "A. mellifera")

data.vjac <- metadata %>% filter(Species == "V. jacobsoni") %>% filter(Host_collected == "A. cerana") 

data.vjam <- metadata %>% filter(Species == "V. jacobsoni") %>% filter(Host_collected == "A. mellifera") 

The following interactive map showed the distribution of both V. destructor and V. jacobsoni samples on either their original host the eastern honey bee A. cerana or on the new host the western honey bee A. mellifera.

If you pass your mouse over a particular point, a pop-up will appear presenting essential information about the particular Varroa mite sample. On the right top corner, a layer control button allow to subselect species/host couple.

V. destructor on A. cerana
V. destructor on A. mellifera
V. jacobsoni on A. cerana
V. jacobsoni on A. mellifera

## Create the interactive map with leaflet
leaflet(metadata) %>% 
  addTiles(group = "OSM (default)") %>%
  ## add the layer other than default we would like to use for background
  addProviderTiles(providers$CartoDB.PositronNoLabels, group = "Positron NoLabels") %>%
  ## adding the layers with V. destructor
  addCircleMarkers(data = data.vdac, data.vdac$Approximate.Y, data.vdac$Approximate.X,
                   weight = 0.5,
                   col = "#FB0000", 
                   radius = 4, 
                   fillOpacity = 0.9, 
                   stroke = T, 
                   popup = ~paste(sep = "<br/>",
                              paste ("Name: ", as.character(ID)),
                              paste ("Host: ", as.character(Host_collected)),
                              paste ("Date: ", as.character(Date)),
                              paste ("mtDNA: ", as.character(Haplotype))),
                   group = 'V. destructor on A. cerana') %>%

  addCircleMarkers(data = data.vdam, data.vdam$Approximate.Y, data.vdam$Approximate.X,
                   weight = 0.5,
                   col = "#FF9999", 
                   radius = 4, 
                   fillOpacity = 0.9, 
                   stroke = T, 
                   popup = ~paste(sep = "<br/>",
                              paste ("Name: ", as.character(ID)),
                              paste ("Host: ", as.character(Host_collected)),
                              paste ("Date: ", as.character(Date)),
                              paste ("mtDNA: ", as.character(Haplotype))),
                   group = 'V. destructor on A. mellifera') %>%

  ## adding the layers with V. jacobsoni
  addCircleMarkers(data = data.vjac, data.vjac$Approximate.Y, data.vjac$Approximate.X,
                   weight = 0.5,
                   col = "#0000FF", 
                   radius = 4, 
                   fillOpacity = 0.9, 
                   stroke = T, 
                   popup = ~paste(sep = "<br/>",
                              paste ("Name: ", as.character(ID)),
                              paste ("Host: ", as.character(Host_collected)),
                              paste ("Date: ", as.character(Date)),
                              paste ("mtDNA: ", as.character(Haplotype))),
                   group = 'V. jacobsoni on A. cerana') %>%

  addCircleMarkers(data = data.vjam, data.vjam$Approximate.Y, data.vjam$Approximate.X,
                   weight = 0.5,
                   col = "#00CCFF", 
                   radius = 4, 
                   fillOpacity = 0.9, 
                   stroke = T,    
                   popup = ~paste(sep = "<br/>",
                              paste ("Name: ", as.character(ID)),
                              paste ("Host: ", as.character(Host_collected)),
                              paste ("Date: ", as.character(Date)),
                              paste ("mtDNA: ", as.character(Haplotype))),
                   group = 'V. jacobsoni on A. mellifera') %>%

    ## adding the control button to remove or add layers of points 
  addLayersControl(position = "topright",
    baseGroups = c("OSM (default)", "Positron NoLabels"),
    overlayGroups = c("V. destructor on A. cerana", 
                      "V. destructor on A. mellifera", 
                      "V. jacobsoni on A. cerana", 
                      "V. jacobsoni on A. mellifera"), 
    options = layersControlOptions(collapsed = TRUE))  %>%
  ## show the positron background prerably to the OSM layer
  showGroup("Positron NoLabels")

2. Reads and mapping statistics for Varroa libraries

Here we plot the mean read depth computed from each .bam file mapped to the reference vdes3.0.fasta using samtools depth -a FILE.bam | awk '{c++;s+=$3}END{print s/c}' in relation to the mapping percentage.

As you can see by moving your pointer on the graph, the individual V788_1 has been removed from the analysis and replaced by the replicate V788_2 with much better reads statistics.

raichu <- ggplot(metadata, aes(x=Mapping_rate, y = Mean_Read_Depth, col=Species, shape=Host_collected, label = ID))
mycol = c("red2", "blue2")
raichu <- raichu + geom_point(size = 3)
raichu <- raichu + scale_color_manual(values = mycol)
raichu <- raichu + scale_size_manual(values = mycol)
raichu <- raichu +  theme_calc()
raichu <- raichu +  xlab("Mapping rate to Vdes_3.0 reference genome(%)")
raichu <- raichu +  ylab("Mean average read depth")
raichu <- raichu +  ylim(0,20)
#raichu

## make an interactive version of the scatter plot
intraichu <- ggplotly(raichu)
intraichu

We highlight the mean read depth regarding the general distribution, where is each group of individuals “species per host” located in it.

evoli <- ggplot(metadata, aes(Mean_Read_Depth, fill = Species))
evoli <- evoli + geom_histogram(bins = 44) 
evoli <- evoli + gghighlight() 
evoli <- evoli + facet_wrap(~Species~Host_collected)
evoli <- evoli + scale_fill_manual(values = c("red", "blue"))
evoli <- evoli + theme_classic()
evoli <- evoli + labs(x = "Mean Read Depth using NextGenMap", y = "Number of samples")
evoli <- evoli + ggtitle("Read depth does not appear as unbalanced accross species and host")
evoli

Following is a plot of the total number of reads generated per individual and the proportion of reads mapped onto the reference.

togepi <- ggplot(metadata, aes(x=ID, label = Mapping_rate))
togepi <- togepi + geom_bar(aes(weight=Total_reads), fill="black", position="dodge")
togepi <- togepi + geom_bar(aes(weight=Mapped), fill="#1d96f2", position="dodge")
togepi <- togepi + theme(axis.text.x = element_text(angle = 90, hjust = 1))
togepi <- togepi + labs(x = "Individuals", y = "Number of reads")
togepi <- togepi + ggtitle("Proportion of reads mapped against reference using NextGenMap")
togepi

3. Variation in the body size

Before proceeding to DNA extraction with a destructive method, we measured the body size of each Varroa mite whenever possible on both dorsal and ventral view. We measured seven morphological characters but retained only the body width and length for comparison here between Varroa species and their associated honey bee hosts.

The scatterplot also shows the average values obtained by Anderson and Trueman (2000) Exp. App. Acar. for V. destructor, V. jacobsoni and undetermined mites from Luzon and Mindanao.

# Table with 512 morpho data for the moment
morpho <- read.csv("R_data/Varroa-morpho-subset.csv", header = TRUE)
#kable(cbind(morpho)) %>%
#  kable_styling(bootstrap_options = "striped", font_size = 10) %>%
#  scroll_box(width = "100%", height = "400px")

morpho %>% 
  plot_ly(x = ~BW_V, y = ~BL_V,
          color = ~Species,
          symbol = ~Host,
          hoverinfo = "text",
          text = ~paste("Name: ", Sample.ID, "<br>",
                        "Species: ", Species, "<br>",
                        "Host: ", Host, "<br>",
                        "Country: ", Country.Island)) %>%
  add_markers(colors = c("indianred2", "red3", "skyblue2", "blue2", "black"), 
              symbols = c( "triangle-up", "circle", "x"), 
              size = 3) %>%
  layout(xaxis = list(title = "Body width (mm)"),
         yaxis = list(title = "Body length (mm)"),
         title = "Variation in body size measured from ventral view")
morpho %>% 
  plot_ly(x = ~BW_D, y = ~BL_D,
          color = ~Species,
          symbol = ~Host,
          hoverinfo = "text",
          text = ~paste("Name: ", Sample.ID, "<br>",
                        "Species: ", Species, "<br>",
                        "Host: ", Host, "<br>",
                        "Country: ", Country.Island)) %>%
  add_markers(colors = c("indianred2", "red3", "skyblue2", "blue2", "black"), 
              symbols = c( "triangle-up", "circle", "x"), 
              size = 3) %>%
  layout(xaxis = list(title = "Body width (mm)"),
         yaxis = list(title = "Body length (mm)"),
         title = "Variation in body size measured from dorsal view")

STATS TEST?

4. Genetic diversity on 43 samples and two species confonded

2D and 3D PCAs on ~ 1.4 million filtered biallelic SNPs

After formatting the variant call file .vcf into a plink format by renaming each chromosome (e.g., NW_019211454.1 by 1, NW_019211455.1 by 2, …), and applying the option pca, we obtained the two files .eigenval and .eigenvec.

The following 2D and 3D PCAs are based on the ~1.4 million biallelic SNPs with a minor allelic frequency higher than 5%.

pca.all<- read_table2("R_data/43indplink.eigenvec", col_names = FALSE)
eigenval.all <- scan("R_data/43indplink.eigenval")

## Here the first two columns contains the individuals names as we did not specify it in plink earlier
# We remove the first column and assign the new name of column 1
pca.all <- pca.all[,-1]
names(pca.all)[1] <- "ID_NAMES"

# Rename the columns for PCA axis 
names(pca.all)[2:ncol(pca.all)] <- paste0("PC", 1:(ncol(pca.all)-1))

## Joint both table by using the FILE_ID as common parameters
metapca.all <- left_join(pca.all, metadata, by = c("ID_NAMES" = "SEQ_ID"))

### PLOTS
# first convert to percentage variance explained
pve <- data.frame(PC = 1:20, pve = eigenval.all/sum(eigenval.all)*100)

# make Eigenplot
a <- ggplot(pve, aes(PC, pve)) + geom_bar(stat = "identity")
a + ylab("Percentage variance explained") + theme_light()

metapca.all %>% 
  plot_ly(x = ~PC1, y = ~PC2,
          color = ~Species,
          symbol = ~Host_collected,
          hoverinfo = "text",
          text = ~paste("Name: ", ID, "<br>",
                        "Species: ", Species, "<br>",
                        "Host: ", Host_collected, "<br>",
                        "Country: ", Country)) %>%
  add_markers(colors = c("red3", "blue2"), 
              symbols = c( "triangle-up", "circle"), 
              size = 4)%>%
  layout(title = "Varroa mites genetically differentiate between hosts in both species")
metapca.all %>% 
  plot_ly(x = ~PC1, y = ~PC2, z = ~PC3,
          color = ~Country,
          hoverinfo = "text",
          text = ~paste("Name: ", ID, "<br>",
                        "Species: ", Species, "<br>",
                        "Host: ", Host_collected, "<br>",
                        "Country: ", Country,
                        "Haplogroup: ", COI_haplogroup)) %>%
  add_markers(colors = c("red3", "purple", "blue2", "green", "orange", "brown", "pink"), 
              size = 10) %>%
  layout(title = "Mites on A. cerana genetically differs within the native range")
metapca.all %>% 
  plot_ly(x = ~PC1, y = ~PC2, z = ~PC3,
          color = ~COI_haplogroup,
          hoverinfo = "text",
          text = ~paste("Name: ", ID, "<br>",
                        "Species: ", Species, "<br>",
                        "Host: ", Host_collected, "<br>",
                        "Country: ", Country,
                        "Haplogroup: ", COI_haplogroup)) %>%
  add_markers(colors = c("red3", "purple", "blue2", "green", "orange", "brown", "pink"), 
              size = 10) %>%
  layout(title = "Mitochondrial lineages can be a pre-indicator of whole genome differentiation")

2D and 3D PCAs on ~ 200,000 biallelic SNPs pruned for LD

Same idea as the previous PCA, except that only 236,713 SNPs were kept to reduce the effect of linkage disequilibrium.

We can see that even with a smaller reduce number of SNPs, Varroa mites species are genetically distinct and

pca.allLD<- read_table2("R_data/43ind-ldpruned.eigenvec", col_names = FALSE)
eigenval.allLD <- scan("R_data/43ind-ldpruned.eigenval")

## Here the first two columns contains the individuals names as we did not specify it in plink earlier
# We remove the first column and assign the new name of column 1
pca.allLD <- pca.allLD[,-1]
names(pca.allLD)[1] <- "ID_NAMES"

# Rename the columns for PCA axis 
names(pca.allLD)[2:ncol(pca.allLD)] <- paste0("PC", 1:(ncol(pca.allLD)-1))

## Joint both table by using the FILE_ID as common parameters
metapca.allLD <- left_join(pca.allLD, metadata, by = c("ID_NAMES" = "SEQ_ID"))

### PLOTS
# first convert to percentage variance explained
pve <- data.frame(PC = 1:20, pve = eigenval.allLD/sum(eigenval.allLD)*100)

# make Eigenplot
a <- ggplot(pve, aes(PC, pve)) + geom_bar(stat = "identity")
a + ylab("Percentage variance explained") + theme_light()

metapca.allLD %>% 
  plot_ly(x = ~PC1, y = ~PC2,
          color = ~Species,
          symbol = ~Host_collected,
          hoverinfo = "text",
          text = ~paste("Name: ", ID, "<br>",
                        "Species: ", Species, "<br>",
                        "Host: ", Host_collected, "<br>",
                        "Country: ", Country)) %>%
  add_markers(colors = c("red3", "blue2"), 
              symbols = c( "triangle-up", "circle"), 
              size = 4) %>%
  layout(title = "~200,000 LD pruned SNPs still allowed to see host genetic divergence")
metapca.allLD %>% 
  plot_ly(x = ~PC1, y = ~PC2, z = ~PC3,
          color = ~Country,
          hoverinfo = "text",
          text = ~paste("Name: ", ID, "<br>",
                        "Species: ", Species, "<br>",
                        "Host: ", Host_collected, "<br>",
                        "Country: ", Country,
                        "Haplogroup: ", COI_haplogroup)) %>%
  add_markers(colors = c("red3", "purple", "blue2", "green", "orange", "brown", "pink"), 
              size = 10) %>%
  layout(title = "3D PCA color-coded by country")
metapca.allLD %>% 
  plot_ly(x = ~PC1, y = ~PC2, z = ~PC3,
          color = ~COI_haplogroup,
          hoverinfo = "text",
          text = ~paste("Name: ", ID, "<br>",
                        "Species: ", Species, "<br>",
                        "Host: ", Host_collected, "<br>",
                        "Country: ", Country,
                        "Haplogroup: ", COI_haplogroup)) %>%
  add_markers(colors = c("red3", "purple", "blue2", "green", "orange", "brown", "pink"), 
              size = 10) %>%
  layout(title = "3D PCA color-coded by mtDNA COX1 haplogroup")